Source Data:- A. Tsanas, A. Xifara: ‘Accurate quantitative estimation of energy performance of residential buildings using statistical machine learning tools’, Energy and Buildings, Vol. 49, pp. 560-567, 2012
library(ggplot2)
# Read the dataset
energy = read.csv("EnergyEfficiencyData.csv", header = TRUE, stringsAsFactors = FALSE)
# Review the names of the columns or the features
names(energy)
## [1] "Relative.Compactness" "Surface.Area"
## [3] "Wall.Area" "Roof.Area"
## [5] "Overall.Height" "Orientation"
## [7] "Glazing.Area" "Glazing.Area.Distribution"
## [9] "Heating.Load" "Cooling.Load"
# Print a summary of the datatypes in the data.
str(energy)
## 'data.frame': 768 obs. of 10 variables:
## $ Relative.Compactness : num 0.98 0.98 0.98 0.98 0.9 0.9 0.9 0.9 0.86 0.86 ...
## $ Surface.Area : num 514 514 514 514 564 ...
## $ Wall.Area : num 294 294 294 294 318 ...
## $ Roof.Area : num 110 110 110 110 122 ...
## $ Overall.Height : num 7 7 7 7 7 7 7 7 7 7 ...
## $ Orientation : int 2 3 4 5 2 3 4 5 2 3 ...
## $ Glazing.Area : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Glazing.Area.Distribution: int 0 0 0 0 0 0 0 0 0 0 ...
## $ Heating.Load : num 15.6 15.6 15.6 15.6 20.8 ...
## $ Cooling.Load : num 21.3 21.3 21.3 21.3 28.3 ...
The data here shows that there are 768 rows (sample) and 10 columns (features).
# Quick check to see if there are any NA's in the data
summary(is.na(energy))
## Relative.Compactness Surface.Area Wall.Area Roof.Area
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:768 FALSE:768 FALSE:768 FALSE:768
## NA's :0 NA's :0 NA's :0 NA's :0
## Overall.Height Orientation Glazing.Area Glazing.Area.Distribution
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:768 FALSE:768 FALSE:768 FALSE:768
## NA's :0 NA's :0 NA's :0 NA's :0
## Heating.Load Cooling.Load
## Mode :logical Mode :logical
## FALSE:768 FALSE:768
## NA's :0 NA's :0
Per this chart, none of the columns have any NAs.
#Scatter plot of the Heating/Cooling Load and Surface Area
ggplot(energy, aes(y = Heating.Load, x = Surface.Area))+ geom_point() + xlab("Surface Area") + ylab("Heating Load") + ggtitle("Relationship between Heating Load and Surface Area")
ggplot(energy, aes(y = Cooling.Load, x = Surface.Area))+ geom_point() + xlab("Surface Area") + ylab("Cooling Load") + ggtitle("Relationship between Cooling Load and Surface Area")
#Scatter Plot of Heating and Cooling Load and Surface Area with Orientation as a factor
ggplot(energy, aes(y = Heating.Load, x = Surface.Area)) + geom_point(aes(color = as.factor(Roof.Area), alpha = 0.3)) + xlab("Surface Area") + ylab("Heating Load") + ggtitle("Impact of Surface Area and Roof.Area on Heating Load")
ggplot(energy, aes(y = Cooling.Load, x = Surface.Area)) + geom_point(aes(color = as.factor(Roof.Area))) + xlab("Surface Area") + ylab("Cooling Load") + ggtitle("Impact of Surface Area and Roof.Area on Cooling Load")
ggplot(energy, aes(y = Heating.Load, x = Surface.Area)) + geom_point(aes(color = as.factor(Relative.Compactness), alpha = 0.3)) + xlab("Surface Area") + ylab("Heating Load") + ggtitle("Impact of Surface Area and Relative Compactness on Heating Load ")
ggplot(energy, aes(y = Cooling.Load, x = Surface.Area)) + geom_point(aes(color = as.factor(Relative.Compactness))) + xlab("Surface Area") + ylab("Cooling Load") + ggtitle("Impact of Surface Area and Relative Compactness on Cooling Load ")
```r
ggplot(energy, aes(y = Heating.Load, x = Surface.Area)) + geom_point(aes(color = as.factor(Overall.Height), alpha = 0.3)) + xlab("Surface Area") + ylab("Heating Load") + ggtitle("Impact of Surface Area and Overall Height on Heating Load ")
```
<img src="HW2_files/figure-html/unnamed-chunk-8-1.png" width="672" />
```r
ggplot(energy, aes(y = Cooling.Load, x = Surface.Area)) + geom_point(aes(color = as.factor(Overall.Height))) + xlab("Surface Area") + ylab("Cooling Load") + ggtitle("Impact of Surface Area and Overall Height on Cooling Load ")
```
<img src="HW2_files/figure-html/unnamed-chunk-8-2.png" width="672" />
ggplot(energy, aes(y = Heating.Load, x = Surface.Area)) + geom_point(aes(color = as.factor(Orientation))) + xlab("Surface Area") + ylab("Heating Load") + ggtitle("Impact of Surface Area and Orientation on Heating Load ")
ggplot(energy, aes(y = Cooling.Load, x = Surface.Area)) + geom_point(aes(color = as.factor(Orientation))) + xlab("Surface Area") + ylab("Cooling Load") + ggtitle("Impact of Surface Area and Orientation on Cooling Load ")
ggplot(energy, aes(y = Heating.Load, x = Relative.Compactness )) + stat_binhex(bins = 10) + xlab("Relative Compactness") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Relative Compactness")
ggplot(energy, aes(y = Cooling.Load, x = Relative.Compactness )) + stat_binhex(bins = 10) + xlab("Relative Compactness") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Relative Compactness")
ggplot(energy, aes(y = Heating.Load, x = Relative.Compactness )) + geom_point() + geom_density2d() + xlab("Relative Compactness") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Relative Compactness")
ggplot(energy, aes(y = Cooling.Load, x = Relative.Compactness )) + geom_point() + geom_density2d() + xlab("Relative Compactness") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Relative Compactness")
ggplot(energy, aes(y = Heating.Load, x = Surface.Area )) + stat_binhex(bins = 10) + xlab("Surface Area") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Surface Area")
ggplot(energy, aes(y = Cooling.Load, x = Surface.Area )) + stat_binhex(bins = 10) + xlab("Surface Area") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Surface Area")
ggplot(energy, aes(y = Heating.Load, x = Surface.Area )) + geom_point() + geom_density2d() + xlab("Surface Area") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Surface Area")
ggplot(energy, aes(y = Cooling.Load, x = Surface.Area)) + geom_point() + geom_density2d() + xlab("Surface Area") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Surface Area")
ggplot(energy, aes(y = Heating.Load, x = Wall.Area )) + stat_binhex(bins = 10) + xlab("Wall Area") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Wall Area")
ggplot(energy, aes(y = Cooling.Load, x = Wall.Area )) + stat_binhex(bins = 10) + xlab("Wall Area") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Wall Area")
ggplot(energy, aes(y = Heating.Load, x = Wall.Area )) + geom_point() + geom_density2d() + xlab("Wall Area") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Wall Area")
ggplot(energy, aes(y = Cooling.Load, x = Wall.Area)) + geom_point() + geom_density2d() + xlab("Wall Area") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Wall Area")
ggplot(energy, aes(y = Heating.Load, x = Roof.Area )) + stat_binhex(bins = 10) + xlab("Roof Area") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Roof Area")
ggplot(energy, aes(y = Cooling.Load, x = Roof.Area )) + stat_binhex(bins = 10) + xlab("Roof Area") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Roof Area")
ggplot(energy, aes(y = Heating.Load, x = Roof.Area )) + geom_point() + geom_density2d() + xlab("Roof Area") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Roof Area")
ggplot(energy, aes(y = Cooling.Load, x = Roof.Area)) + geom_point() + geom_density2d() + xlab("Roof Area") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Roof Area")
ggplot(energy, aes(y = Heating.Load, x = Overall.Height )) + stat_binhex(bins = 10) + xlab("Overall Height") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Overall Height")
ggplot(energy, aes(y = Cooling.Load, x = Overall.Height )) + stat_binhex(bins = 10) + xlab("Overall Height") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Overall Height")
ggplot(energy, aes(y = Heating.Load, x = Overall.Height )) + geom_point() + geom_density2d() + xlab("Overall Height") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Overall Height")
ggplot(energy, aes(y = Cooling.Load, x = Overall.Height)) + geom_point() + geom_density2d() + xlab("Overall Height") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Overall Height")
ggplot(energy, aes(y = Heating.Load, x = Orientation )) + stat_binhex(bins = 10) + xlab("Orientation") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Orientation")
ggplot(energy, aes(y = Cooling.Load, x = Orientation )) + stat_binhex(bins = 10) + xlab("Orientation") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Orientation")
ggplot(energy, aes(y = Heating.Load, x = Orientation )) + geom_point() + geom_density2d() + xlab("Orientation") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Orientation")
ggplot(energy, aes(y = Cooling.Load, x = Orientation)) + geom_point() + geom_density2d() + xlab("Orientation") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Orientation")
ggplot(energy, aes(y = Heating.Load, x = Glazing.Area )) + stat_binhex(bins = 10) + xlab("Glazing Area") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Glazing Area")
ggplot(energy, aes(y = Cooling.Load, x = Glazing.Area )) + stat_binhex(bins = 10) + xlab("Glazing Area") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Glazing Area")
ggplot(energy, aes(y = Heating.Load, x = Glazing.Area )) + geom_point() + geom_density2d() + xlab("Glazing Area") + ylab("Heating Load") + ggtitle("Density of points when comparing Heating Load and Glazing Area")
ggplot(energy, aes(y = Cooling.Load, x = Glazing.Area)) + geom_point() + geom_density2d() + xlab("Glazing Area") + ylab("Cooling Load") + ggtitle("Density of points when comparing Heating Load and Glazing Area")
for(name in names(energy)){
print(ggplot(energy, aes_string(y = "Heating.Load", x = name )) + geom_point() + geom_density2d())
print(ggplot(energy, aes_string(y = "Heating.Load", x = name )) + stat_binhex(bins = 10))
}
ggplot(energy, aes(y = Heating.Load, x = Relative.Compactness )) + geom_point() + geom_density2d()
ggplot(energy, aes(y = Heating.Load, x = Surface.Area )) + geom_point() + geom_density2d()
ggplot(energy, aes(y = Heating.Load, x = Wall.Area )) + geom_point() + geom_density2d()
ggplot(energy, aes(y = Heating.Load, x = Roof.Area )) + geom_point() + geom_density2d()
ggplot(energy, aes(y = Heating.Load, x = Overall.Height )) + geom_point() + geom_density2d()
ggplot(energy, aes(y = Heating.Load, x = Orientation )) + geom_point() + geom_density2d()
ggplot(energy, aes(y = Heating.Load, x = Glazing.Area )) + geom_point() + geom_density2d()
ggplot(energy, aes(y = Heating.Load, x = Glazing.Area.Distribution )) + geom_point() + geom_density2d()
ggplot(energy, aes(y = Heating.Load, x = Relative.Compactness )) + stat_binhex(bins = 10)
ggplot(energy, aes(y = Heating.Load, x = Surface.Area )) + stat_binhex(bins = 10)
ggplot(energy, aes(y = Heating.Load, x = Wall.Area )) + stat_binhex(bins = 10)
ggplot(energy, aes(y = Heating.Load, x = Roof.Area )) + stat_binhex(bins = 10)
ggplot(energy, aes(y = Heating.Load, x = Overall.Height )) + stat_binhex(bins = 10)
ggplot(energy, aes(y = Heating.Load, x = Orientation )) + stat_binhex(bins = 10)
ggplot(energy, aes(y = Heating.Load, x = Glazing.Area )) + stat_binhex(bins = 10)
ggplot(energy, aes(y = Heating.Load, x = Glazing.Area.Distribution )) + stat_binhex(bins = 10)
ggplot(energy, aes(y = Heating.Load, x = Surface.Area)) + geom_point(, alpha = 0.3)
ggplot(energy, aes(y = Heating.Load, x = Surface.Area)) + geom_point(aes (size = Wall.Area), alpha = 0.3)